Run download_data.Rmd and
percentage_of_regional_richness.Rmd First!
library(randomForest)
library(reshape2)
library(rpart)
library(ggplot2)
library(tidyverse)
library(multcomp)
library(car)
library(VSURF)
library(boot)
city_data
length(city_data$city_gdp_per_population[!is.na(city_data$city_gdp_per_population)])
[1] 30
length(city_data$percentage_urban_area_as_open_public_spaces[!is.na(city_data$percentage_urban_area_as_open_public_spaces)])
[1] 61
length(city_data$happiness_future_life[!is.na(city_data$happiness_future_life)])
[1] 65
length(city_data$mean_population_exposure_to_pm2_5_2019[!is.na(city_data$mean_population_exposure_to_pm2_5_2019)])
[1] 131
fetch_city_data_for <- function(pool_name, include_city_name = F) {
results_filename <- paste(paste('percentage_of_regional_richness__output_', pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
joined$abs_city_centre_latitude = abs(joined$city_centre_latitude)
required_columns <- c("population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_to_pm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urban", "shrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population", "city_ndvi", "city_ssm", "city_susm", "region_20km_ndvi", "region_20km_ssm", "region_20km_susm", "region_50km_ndvi", "region_50km_ssm", "region_50km_susm", "region_100km_ndvi", "region_100km_ssm", "region_100km_susm", "city_percentage_protected", "region_20km_percentage_protected", "region_50km_percentage_protected", "region_100km_percentage_protected", "city_centre_latitude", "abs_city_centre_latitude")
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
required_columns <- append(c("response"), required_columns)
joined[,required_columns]
}
| Merlin Response |
merlin_city_data <- fetch_city_data_for('merlin')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 20.86 115.69 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.33 118.34 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.44 118.95 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.19 117.56 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 21.05 116.78 |
merlin_city_data_fixed
ggplot(merlin_city_data_fixed, aes(response)) + geom_histogram(binwidth = 2)
names(merlin_city_data_fixed)
[1] "response" "population_growth"
[3] "rainfall_monthly_min" "rainfall_annual_average"
[5] "rainfall_monthly_max" "temperature_annual_average"
[7] "temperature_monthly_min" "temperature_monthly_max"
[9] "happiness_negative_effect" "happiness_positive_effect"
[11] "happiness_future_life" "number_of_biomes"
[13] "realm" "biome_name"
[15] "region_20km_includes_estuary" "region_50km_includes_estuary"
[17] "region_100km_includes_estuary" "city_includes_estuary"
[19] "region_20km_average_pop_density" "region_50km_average_pop_density"
[21] "region_100km_average_pop_density" "city_max_pop_density"
[23] "city_average_pop_density" "mean_population_exposure_to_pm2_5_2019"
[25] "region_20km_cultivated" "region_20km_urban"
[27] "region_50km_cultivated" "region_50km_urban"
[29] "region_100km_cultivated" "region_100km_urban"
[31] "region_20km_elevation_delta" "region_20km_mean_elevation"
[33] "region_50km_elevation_delta" "region_50km_mean_elevation"
[35] "region_100km_elevation_delta" "region_100km_mean_elevation"
[37] "city_elevation_delta" "city_mean_elevation"
[39] "urban" "shrubs"
[41] "permanent_water" "open_forest"
[43] "herbaceous_wetland" "herbaceous_vegetation"
[45] "cultivated" "closed_forest"
[47] "share_of_population_within_400m_of_open_space" "percentage_urban_area_as_streets"
[49] "percentage_urban_area_as_open_public_spaces_and_streets" "percentage_urban_area_as_open_public_spaces"
[51] "city_gdp_per_population" "city_ndvi"
[53] "city_ssm" "city_susm"
[55] "region_20km_ndvi" "region_20km_ssm"
[57] "region_20km_susm" "region_50km_ndvi"
[59] "region_50km_ssm" "region_50km_susm"
[61] "region_100km_ndvi" "region_100km_ssm"
[63] "region_100km_susm" "city_percentage_protected"
[65] "region_20km_percentage_protected" "region_50km_percentage_protected"
[67] "region_100km_percentage_protected" "city_centre_latitude"
[69] "abs_city_centre_latitude"
merlin_response <- merlin_city_data_fixed$response
merlin_predictors <- merlin_city_data_fixed[,-1]
merlin_predictors
merlin_interp <- VSURF(x = merlin_predictors, y = merlin_response)
Thresholding step
Estimated computational time (on one core): 115.2 sec.
|
| | 0%
|
|=== | 2%
|
|====== | 4%
|
|========= | 6%
|
|=========== | 8%
|
|============== | 10%
|
|================= | 12%
|
|==================== | 14%
|
|======================= | 16%
|
|========================== | 18%
|
|============================= | 20%
|
|=============================== | 22%
|
|================================== | 24%
|
|===================================== | 26%
|
|======================================== | 28%
|
|=========================================== | 30%
|
|============================================== | 32%
|
|================================================= | 34%
|
|=================================================== | 36%
|
|====================================================== | 38%
|
|========================================================= | 40%
|
|============================================================ | 42%
|
|=============================================================== | 44%
|
|================================================================== | 46%
|
|===================================================================== | 48%
|
|======================================================================== | 50%
|
|========================================================================== | 52%
|
|============================================================================= | 54%
|
|================================================================================ | 56%
|
|=================================================================================== | 58%
|
|====================================================================================== | 60%
|
|========================================================================================= | 62%
|
|============================================================================================ | 64%
|
|============================================================================================== | 66%
|
|================================================================================================= | 68%
|
|==================================================================================================== | 70%
|
|======================================================================================================= | 72%
|
|========================================================================================================== | 74%
|
|============================================================================================================= | 76%
|
|================================================================================================================ | 78%
|
|================================================================================================================== | 80%
|
|===================================================================================================================== | 82%
|
|======================================================================================================================== | 84%
|
|=========================================================================================================================== | 86%
|
|============================================================================================================================== | 88%
|
|================================================================================================================================= | 90%
|
|==================================================================================================================================== | 92%
|
|====================================================================================================================================== | 94%
|
|========================================================================================================================================= | 96%
|
|============================================================================================================================================ | 98%
|
|===============================================================================================================================================| 100%
Interpretation step (on 34 variables)
Estimated computational time (on one core): between 43.3 sec. and 232.9 sec.
|
| | 0%
|
|==== | 3%
|
|======== | 6%
|
|============= | 9%
|
|================= | 12%
|
|===================== | 15%
|
|========================= | 18%
|
|============================= | 21%
|
|================================== | 24%
|
|====================================== | 26%
|
|========================================== | 29%
|
|============================================== | 32%
|
|================================================== | 35%
|
|======================================================= | 38%
|
|=========================================================== | 41%
|
|=============================================================== | 44%
|
|=================================================================== | 47%
|
|======================================================================== | 50%
|
|============================================================================ | 53%
|
|================================================================================ | 56%
|
|==================================================================================== | 59%
|
|======================================================================================== | 62%
|
|============================================================================================= | 65%
|
|================================================================================================= | 68%
|
|===================================================================================================== | 71%
|
|========================================================================================================= | 74%
|
|============================================================================================================= | 76%
|
|================================================================================================================== | 79%
|
|====================================================================================================================== | 82%
|
|========================================================================================================================== | 85%
|
|============================================================================================================================== | 88%
|
|================================================================================================================================== | 91%
|
|======================================================================================================================================= | 94%
|
|=========================================================================================================================================== | 97%
|
|===============================================================================================================================================| 100%
Prediction step (on 3 variables)
Maximum estimated computational time (on one core): 3.8 sec.
|
| | 0%
|
|================================================ | 33%
|
|=============================================================================================== | 67%
|
|===============================================================================================================================================| 100%
names(merlin_predictors[,merlin_interp$varselect.interp])
[1] "abs_city_centre_latitude" "region_50km_elevation_delta" "biome_name"
| Birdlife Response |
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data
ggplot(birdlife_city_data, aes(response)) + geom_histogram(binwidth = 1)
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.97 94.50 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.855 92.68 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.676 89.85 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.745 90.94 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.587 88.44 |
birdlife_city_data_fixed
names(birdlife_city_data_fixed)
[1] "response" "population_growth"
[3] "rainfall_monthly_min" "rainfall_annual_average"
[5] "rainfall_monthly_max" "temperature_annual_average"
[7] "temperature_monthly_min" "temperature_monthly_max"
[9] "happiness_negative_effect" "happiness_positive_effect"
[11] "happiness_future_life" "number_of_biomes"
[13] "realm" "biome_name"
[15] "region_20km_includes_estuary" "region_50km_includes_estuary"
[17] "region_100km_includes_estuary" "city_includes_estuary"
[19] "region_20km_average_pop_density" "region_50km_average_pop_density"
[21] "region_100km_average_pop_density" "city_max_pop_density"
[23] "city_average_pop_density" "mean_population_exposure_to_pm2_5_2019"
[25] "region_20km_cultivated" "region_20km_urban"
[27] "region_50km_cultivated" "region_50km_urban"
[29] "region_100km_cultivated" "region_100km_urban"
[31] "region_20km_elevation_delta" "region_20km_mean_elevation"
[33] "region_50km_elevation_delta" "region_50km_mean_elevation"
[35] "region_100km_elevation_delta" "region_100km_mean_elevation"
[37] "city_elevation_delta" "city_mean_elevation"
[39] "urban" "shrubs"
[41] "permanent_water" "open_forest"
[43] "herbaceous_wetland" "herbaceous_vegetation"
[45] "cultivated" "closed_forest"
[47] "share_of_population_within_400m_of_open_space" "percentage_urban_area_as_streets"
[49] "percentage_urban_area_as_open_public_spaces_and_streets" "percentage_urban_area_as_open_public_spaces"
[51] "city_gdp_per_population" "city_ndvi"
[53] "city_ssm" "city_susm"
[55] "region_20km_ndvi" "region_20km_ssm"
[57] "region_20km_susm" "region_50km_ndvi"
[59] "region_50km_ssm" "region_50km_susm"
[61] "region_100km_ndvi" "region_100km_ssm"
[63] "region_100km_susm" "city_percentage_protected"
[65] "region_20km_percentage_protected" "region_50km_percentage_protected"
[67] "region_100km_percentage_protected" "city_centre_latitude"
[69] "abs_city_centre_latitude"
birdlife_response <- birdlife_city_data_fixed$response
birdlife_predictors <- birdlife_city_data_fixed[,-1]
birdlife_predictors
birdlife_interp <- VSURF(x = birdlife_predictors, y = birdlife_response)
Thresholding step
Estimated computational time (on one core): 103.7 sec.
|
| | 0%
|
|=== | 2%
|
|====== | 4%
|
|========= | 6%
|
|=========== | 8%
|
|============== | 10%
|
|================= | 12%
|
|==================== | 14%
|
|======================= | 16%
|
|========================== | 18%
|
|============================= | 20%
|
|=============================== | 22%
|
|================================== | 24%
|
|===================================== | 26%
|
|======================================== | 28%
|
|=========================================== | 30%
|
|============================================== | 32%
|
|================================================= | 34%
|
|=================================================== | 36%
|
|====================================================== | 38%
|
|========================================================= | 40%
|
|============================================================ | 42%
|
|=============================================================== | 44%
|
|================================================================== | 46%
|
|===================================================================== | 48%
|
|======================================================================== | 50%
|
|========================================================================== | 52%
|
|============================================================================= | 54%
|
|================================================================================ | 56%
|
|=================================================================================== | 58%
|
|====================================================================================== | 60%
|
|========================================================================================= | 62%
|
|============================================================================================ | 64%
|
|============================================================================================== | 66%
|
|================================================================================================= | 68%
|
|==================================================================================================== | 70%
|
|======================================================================================================= | 72%
|
|========================================================================================================== | 74%
|
|============================================================================================================= | 76%
|
|================================================================================================================ | 78%
|
|================================================================================================================== | 80%
|
|===================================================================================================================== | 82%
|
|======================================================================================================================== | 84%
|
|=========================================================================================================================== | 86%
|
|============================================================================================================================== | 88%
|
|================================================================================================================================= | 90%
|
|==================================================================================================================================== | 92%
|
|====================================================================================================================================== | 94%
|
|========================================================================================================================================= | 96%
|
|============================================================================================================================================ | 98%
|
|===============================================================================================================================================| 100%
Interpretation step (on 49 variables)
Estimated computational time (on one core): between 66.2 sec. and 428.7 sec.
|
| | 0%
|
|=== | 2%
|
|====== | 4%
|
|========= | 6%
|
|============ | 8%
|
|=============== | 10%
|
|================== | 12%
|
|==================== | 14%
|
|======================= | 16%
|
|========================== | 18%
|
|============================= | 20%
|
|================================ | 22%
|
|=================================== | 24%
|
|====================================== | 27%
|
|========================================= | 29%
|
|============================================ | 31%
|
|=============================================== | 33%
|
|================================================== | 35%
|
|===================================================== | 37%
|
|======================================================= | 39%
|
|========================================================== | 41%
|
|============================================================= | 43%
|
|================================================================ | 45%
|
|=================================================================== | 47%
|
|====================================================================== | 49%
|
|========================================================================= | 51%
|
|============================================================================ | 53%
|
|=============================================================================== | 55%
|
|================================================================================== | 57%
|
|===================================================================================== | 59%
|
|======================================================================================== | 61%
|
|========================================================================================== | 63%
|
|============================================================================================= | 65%
|
|================================================================================================ | 67%
|
|=================================================================================================== | 69%
|
|====================================================================================================== | 71%
|
|========================================================================================================= | 73%
|
|============================================================================================================ | 76%
|
|=============================================================================================================== | 78%
|
|================================================================================================================== | 80%
|
|===================================================================================================================== | 82%
|
|======================================================================================================================== | 84%
|
|=========================================================================================================================== | 86%
|
|============================================================================================================================= | 88%
|
|================================================================================================================================ | 90%
|
|=================================================================================================================================== | 92%
|
|====================================================================================================================================== | 94%
|
|========================================================================================================================================= | 96%
|
|============================================================================================================================================ | 98%
|
|===============================================================================================================================================| 100%
Prediction step (on 2 variables)
Maximum estimated computational time (on one core): 2.6 sec.
|
| | 0%
|
|======================================================================== | 50%
|
|===============================================================================================================================================| 100%
names(birdlife_predictors[,birdlife_interp$varselect.interp])
[1] "population_growth" "region_50km_ssm"
| So…. |
|---|
| Merlin: “abs_city_centre_latitude” “region_50km_elevation_delta” “biome_name” “region_50km_ssm” [5] “region_100km_ssm” “region_20km_elevation_delta” “region_20km_urban” “region_100km_elevation_delta” [9] “temperature_annual_average” “city_ndvi” “city_gdp_per_population” “shrubs” [13] “permanent_water” “city_centre_latitude” “region_20km_cultivated” Birdlife: “population_growth” “region_50km_ssm” |
merlin_city_data_named <- fetch_city_data_for('merlin', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data_named <- fetch_city_data_for('birdlife', T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
| Use cross validation and dropping terms to find best model |
full model: response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated
merlin_city_data_fixed_no_boreal <- merlin_city_data_fixed[merlin_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.75796
– CVE 19.72841 – Can we drop one?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.82035
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.43811
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.66906
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.69843
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.50033
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.34947
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.74441
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + permanent_water + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.60391
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + city_centre_latitude + region_20km_cultivated))$delta[1]
[1] 19.52736
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated))$delta[1]
[1] 19.4595
cv.glm(merlin_city_data_fixed_no_boreal, glm(data = merlin_city_data_fixed_no_boreal, formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_ndvi + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude))$delta[1]
[1] 19.51437
– drop city_ndvi to give smaller CVE of 19.35 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.53271
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.03886
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.27071
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.10284
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.31519
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.20544
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + city_centre_latitude + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.15889
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.06013
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + city_centre_latitude, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.09865
– drop city_centre_latitude to give smaller CVE of 19.06 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.26209
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.76303
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.9901
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.00448
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.83634
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 19.02621
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.92643
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.87574
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_elevation_delta + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.82918
– drop region_20km_elevation_delta to give smaller CVE of 18.76 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.92774
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.67653
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.72593
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.54244
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.72148
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.63283
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + region_20km_cultivated, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.57081
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.53977
– drop region_20km_cultivated to give smaller CVE of 18.54 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.7919
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.44537
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.52783
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.32598
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.48533
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + permanent_water, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.43578
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.34359
– drop permanent_water to give smaller CVE of 18.34 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.52597
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.27261
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + temperature_annual_average + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.37099
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.14149
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.33385
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + temperature_annual_average + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.25196
– drop temperature_annual_average to give smaller CVE of 18.14 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_20km_urban + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.32727
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.03948
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.1543
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.14642
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_20km_urban + region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.0526
– drop region_20km_urban to give smaller CVE of 18.03 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.34434
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + city_gdp_per_population + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.04783
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + shrubs, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.01086
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 17.95031
– drop shrubs to give smaller CVE of 17.95 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.24369
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + city_gdp_per_population, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.0219
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 17.94321
– drop city_gdp_per_population to give smaller CVE of 17.94 – can we drop another?
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_elevation_delta, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.06258
cv.glm(merlin_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm, data = merlin_city_data_fixed_no_boreal))$delta[1]
[1] 18.19515
– No
| – best model with region_100km_ssm + region_100km_elevation_delta (CV error 17.91216) |
summary(glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_100km_elevation_delta))
Call:
glm(formula = response ~ region_100km_ssm + region_100km_elevation_delta,
data = merlin_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.6625 -3.0246 -0.4496 1.9868 16.9640
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6877517 1.1338124 2.371 0.0192 *
region_100km_ssm -0.1327133 0.0698210 -1.901 0.0595 .
region_100km_elevation_delta -0.0005261 0.0002944 -1.787 0.0762 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.46142)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2339.8 on 134 degrees of freedom
AIC: 785.57
Number of Fisher Scoring iterations: 2
reg_merlin = glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_100km_elevation_delta)
with(summary(reg_merlin), 1 - deviance/null.deviance)
[1] 0.05255
birdlife_city_data_fixed_no_boreal <- birdlife_city_data_fixed[birdlife_city_data_fixed$biome_name != 'Boreal Forests/Taiga',]
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.899862
– can we drop a variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.768164
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.752211
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + biome_name + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.989636
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.503421
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.780578
– drop biome_name to give CVE of 6.503421 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.417311
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.426562
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.430742
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.439714
– drop region_100km_ssm to give CVE of 6.417311 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_elevation_delta + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.535285
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.342025
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm + region_50km_elevation_delta, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.352664
– drop region_50km_elevation_delta to give CVE of 6.342025 – can we drop another?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ population_growth, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.464699
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.291299
– drop city_gdp_per_population to give CVE of 6.291299 – is this better than no variable?
cv.glm(birdlife_city_data_fixed_no_boreal, glm(formula = response ~ 1, data = birdlife_city_data_fixed_no_boreal))$delta[1]
[1] 6.395701
– yes, just!
| – so best model with birdlife is region_50km_ssm |
summary(glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm))
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5353 -1.5461 -0.4124 1.3071 10.7572
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.26916 0.65041 1.951 0.0531 .
region_50km_ssm -0.08499 0.04115 -2.065 0.0408 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.214378)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 838.94 on 135 degrees of freedom
AIC: 643.06
Number of Fisher Scoring iterations: 2
reg_birdlife = glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
with(summary(reg_birdlife), 1 - deviance/null.deviance)
[1] 0.03062471
ggplot(birdlife_city_data_named, aes(x = region_50km_ssm, y = response)) + geom_point() + geom_smooth(method = "glm")
`geom_smooth()` using formula 'y ~ x'
| Check birdlife model fit |
birdlife.fit <- glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
summary(birdlife.fit)
Call:
glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_fixed)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.5353 -1.5461 -0.4124 1.3071 10.7572
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.26916 0.65041 1.951 0.0531 .
region_50km_ssm -0.08499 0.04115 -2.065 0.0408 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.214378)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 838.94 on 135 degrees of freedom
AIC: 643.06
Number of Fisher Scoring iterations: 2
with(summary(birdlife.fit), 1 - deviance/null.deviance)
[1] 0.03062471
plot(birdlife.fit)
birdlife_city_data_fixed_no_boreal[c(16, 53, 72), c("region_50km_ssm")]
[1] 18.451180 9.961682 11.644862
city_data[c(16, 53, 72), c("name", "region_50km_ssm")]
dat <- predict(glm(formula = response ~ region_50km_ssm, data = birdlife_city_data_named), se.fit=T)
outside_se <- birdlife_city_data_named[birdlife_city_data_named$response < dat$fit - 15* dat$se.fit | birdlife_city_data_named$response > dat$fit + 15 * dat$se.fit,]
ggplot(birdlife_city_data_named, aes(x = region_50km_ssm, y = response)) +
geom_point(size=1) +
geom_smooth(method = "glm") +
geom_text(aes(label = name), data = outside_se, size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = outside_se, color = "red") +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Regional (50km) SSM") + labs(title = "Birdlife")
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave("city_effect_richness__output__birdlife.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
Warning: Width not defined. Set with `position_dodge(width = ?)`
birdlife_city_data_named$residuals <- resid(birdlife.fit)
ggplot(birdlife_city_data_named, aes(y = response, x = residuals)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
labs(title = "Birdlife", subtitle = paste("Correlation", cor(birdlife_city_data_named$residuals, birdlife_city_data_named$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
| Check Merlin model fit |
merlin.fit <- glm(data = merlin_city_data_named, formula = response ~ region_100km_ssm + region_50km_elevation_delta)
summary(merlin.fit)
Call:
glm(formula = response ~ region_100km_ssm + region_50km_elevation_delta,
data = merlin_city_data_named)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.6599 -2.9987 -0.5524 1.7449 16.9143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6809304 1.1210300 2.391 0.0182 *
region_100km_ssm -0.1331207 0.0695604 -1.914 0.0578 .
region_50km_elevation_delta -0.0006899 0.0003461 -1.994 0.0482 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 17.36262)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2326.6 on 134 degrees of freedom
AIC: 784.8
Number of Fisher Scoring iterations: 2
with(summary(merlin.fit), 1 - deviance/null.deviance)
[1] 0.05791102
plot(merlin.fit)
merlin_city_data_fixed_no_boreal[c(24, 42, 72), c("region_100km_ssm", "region_50km_elevation_delta")]
city_data[c(24, 42, 72), c("name", "region_100km_ssm", "region_50km_elevation_delta")]
ggplot(merlin_city_data_named, aes(x = region_100km_ssm, y = response)) +
geom_point(aes(size = region_50km_elevation_delta)) +
geom_smooth(method = "glm") +
theme_bw() +
theme(legend.position="bottom") +
ylab("City Random Effect Intercept") + xlab("Regional (100km) SSM") + labs(title = "eBird") + guides(size=guide_legend(title="Regional (50km) Elevation Delta"))
`geom_smooth()` using formula 'y ~ x'
ggsave("city_effect_richness__output__merlin.jpg")
Saving 7.29 x 4.51 in image
`geom_smooth()` using formula 'y ~ x'
merlin_city_data_named$residuals <- resid(merlin.fit)
ggplot(merlin_city_data_named, aes(y = response, x = residuals)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
labs(title = "Merlin", subtitle = paste("Correlation", cor(merlin_city_data_named$residuals, merlin_city_data_named$response))) +
theme_bw()
`geom_smooth()` using formula 'y ~ x'
| Check AIC |
AIC(
glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth),
glm(data = merlin_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_elevation_delta)
)
AIC(
glm(data = birdlife_city_data_fixed, formula = response ~ region_100km_ssm + region_50km_ssm + region_50km_elevation_delta + biome_name + population_growth),
glm(data = birdlife_city_data_fixed, formula = response ~ region_50km_ssm)
)
| LDG |
birdlife_data_with_lat = fetch_city_data_for('birdlife', include_city_name = T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
model2 <- glm(formula = response ~ I(city_centre_latitude^2), data = birdlife_data_with_lat)
dat2 <- predict(model2, se.fit=T)
summary(model2)
Call:
glm(formula = response ~ I(city_centre_latitude^2), data = birdlife_data_with_lat)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.7461 -1.4878 -0.4638 1.2399 9.5839
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.4426687 0.3308024 -1.338 0.1831
I(city_centre_latitude^2) 0.0004780 0.0002725 1.754 0.0817 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 6.267835)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 846.16 on 135 degrees of freedom
AIC: 644.23
Number of Fisher Scoring iterations: 2
outside_se <- birdlife_data_with_lat[birdlife_data_with_lat$response < dat2$fit - 15* dat2$se.fit | birdlife_data_with_lat$response > dat2$fit + 15 * dat2$se.fit,]
ggplot(birdlife_data_with_lat, aes(x = city_centre_latitude, y = response)) +
geom_point(size=1) +
geom_smooth(method = "glm", formula = y ~ I(x^2)) +
geom_text(aes(label = name), data = outside_se, size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = outside_se, color = "red") +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Latitude (of city centre)") + labs(title = "Birdlife")
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave('city_effect_richness__output__ldg_birdlife.jpg')
Saving 7.29 x 4.51 in image
Warning: Width not defined. Set with `position_dodge(width = ?)`
merlin_data_with_lat = fetch_city_data_for('merlin', include_city_name = T)
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
model2 <- glm(formula = response ~ I(city_centre_latitude^2), data = merlin_data_with_lat)
dat2 <- predict(model2, se.fit=T)
summary(model2)
Call:
glm(formula = response ~ I(city_centre_latitude^2), data = merlin_data_with_lat)
Deviance Residuals:
Min 1Q Median 3Q Max
-9.209 -2.825 -0.388 1.463 18.438
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.201e-02 5.651e-01 0.021 0.983
I(city_centre_latitude^2) -1.296e-05 4.655e-04 -0.028 0.978
(Dispersion parameter for gaussian family taken to be 18.29329)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 2469.6 on 135 degrees of freedom
AIC: 790.97
Number of Fisher Scoring iterations: 2
outside_se <- merlin_data_with_lat[merlin_data_with_lat$response < dat2$fit - 15* dat2$se.fit | merlin_data_with_lat$response > dat2$fit + 15 * dat2$se.fit,]
ggplot(merlin_data_with_lat, aes(x = city_centre_latitude, y = response)) +
geom_point(size=1) +
geom_smooth(method = "glm", formula = y ~ I(x^2)) +
geom_text(aes(label = name), data = outside_se, size = 3, position = "dodge", vjust = "inward", hjust = "inward", color = "red", angle=-15) +
geom_point(data = outside_se, color = "red") +
theme_bw() +
ylab("City Random Effect Intercept") + xlab("Latitude (of city centre)") + labs(title = "eBird")
Warning: Width not defined. Set with `position_dodge(width = ?)`
ggsave('city_effect_richness__output__ldg_merlin.jpg')
Saving 7.29 x 4.51 in image
Warning: Width not defined. Set with `position_dodge(width = ?)`